1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package com.google.common.base;
18
19 import com.google.common.annotations.GwtCompatible;
20
21 import junit.framework.TestCase;
22
23
24
25
26
27
28
29
30 @GwtCompatible(emulated = true)
31 public class Utf8Test extends TestCase {
32 public void testEncodedLength_validStrings() {
33 assertEquals(0, Utf8.encodedLength(""));
34 assertEquals(11, Utf8.encodedLength("Hello world"));
35 assertEquals(8, Utf8.encodedLength("Résumé"));
36 assertEquals(461, Utf8.encodedLength("威廉·莎士比亞(William Shakespeare,"
37 + "1564年4月26號—1616年4月23號[1])係隻英國嗰演員、劇作家同詩人,"
38 + "有時間佢簡稱莎翁;中國清末民初哈拕翻譯做舌克斯毕、沙斯皮耳、筛斯比耳、"
39 + "莎基斯庇尔、索士比尔、夏克思芘尔、希哀苦皮阿、叶斯壁、沙克皮尔、"
40 + "狹斯丕爾。[2]莎士比亞編寫過好多作品,佢嗰劇作響西洋文學好有影響,"
41 + "哈都拕人翻譯做好多話。"));
42
43 assertEquals(4, Utf8.encodedLength(
44 newString(Character.MIN_HIGH_SURROGATE, Character.MIN_LOW_SURROGATE)));
45 }
46
47 public void testEncodedLength_invalidStrings() {
48 testEncodedLengthFails(newString(Character.MIN_HIGH_SURROGATE), 0);
49 testEncodedLengthFails("foobar" + newString(Character.MIN_HIGH_SURROGATE), 6);
50 testEncodedLengthFails(newString(Character.MIN_LOW_SURROGATE), 0);
51 testEncodedLengthFails("foobar" + newString(Character.MIN_LOW_SURROGATE), 6);
52 testEncodedLengthFails(
53 newString(
54 Character.MIN_HIGH_SURROGATE,
55 Character.MIN_HIGH_SURROGATE), 0);
56 }
57
58 private static void testEncodedLengthFails(String invalidString,
59 int invalidCodePointIndex) {
60 try {
61 Utf8.encodedLength(invalidString);
62 fail();
63 } catch (IllegalArgumentException expected) {
64 assertEquals("Unpaired surrogate at index " + invalidCodePointIndex,
65 expected.getMessage());
66 }
67 }
68
69
70 private static final long ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
71 0x007f - 0x0000 + 1;
72
73
74 private static final long EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT =
75 ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
76
77
78 private static final long TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS =
79 0x07FF - 0x0080 + 1;
80
81
82 private static final long EXPECTED_TWO_BYTE_ROUNDTRIPPABLE_COUNT =
83
84 (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 2) +
85
86 TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS;
87
88
89 private static final long THREE_BYTE_SURROGATES = 2 * 1024;
90
91
92 private static final long THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS =
93 0xFFFF - 0x0800 + 1 - THREE_BYTE_SURROGATES;
94
95
96 private static final long EXPECTED_THREE_BYTE_ROUNDTRIPPABLE_COUNT =
97
98 (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 3) +
99
100 2 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
101 ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
102
103 THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS;
104
105
106 private static final long FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS =
107 0x10FFFF - 0x10000L + 1;
108
109
110 private static final long EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT =
111
112 (long) Math.pow(EXPECTED_ONE_BYTE_ROUNDTRIPPABLE_COUNT, 4) +
113
114 2 * THREE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
115 ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
116
117 TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS +
118
119 3 * TWO_BYTE_ROUNDTRIPPABLE_CHARACTERS *
120 ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS *
121 ONE_BYTE_ROUNDTRIPPABLE_CHARACTERS +
122
123 FOUR_BYTE_ROUNDTRIPPABLE_CHARACTERS;
124
125
126
127
128
129
130 public void testIsWellFormed_4BytesSamples() {
131
132 assertWellFormed(0xF0, 0xA4, 0xAD, 0xA2);
133
134 assertNotWellFormed(0xF0, 0xA4, 0xAD, 0x7F);
135 assertNotWellFormed(0xF0, 0xA4, 0xAD, 0xC0);
136
137 assertNotWellFormed(0xF0, 0x8F, 0xAD, 0xA2);
138 assertNotWellFormed(0xF4, 0x90, 0xAD, 0xA2);
139 }
140
141
142 public void testSomeSequences() {
143
144 assertWellFormed();
145
146 assertWellFormed(0x00, 0x61, 0x62, 0x63, 0x7F);
147
148 assertWellFormed(0xC2, 0xA2, 0xC2, 0xA2);
149
150 assertWellFormed(0xc8, 0x8a, 0x63, 0xc8, 0x8a, 0x63);
151
152
153 assertWellFormed(0xc9, 0x8b, 0x36, 0x32, 0xc9, 0x8b, 0x36, 0x32);
154
155
156 assertWellFormed(0x61, 0xc8, 0x8a, 0x63, 0xc2, 0xa2, 0x62, 0x5c, 0x75, 0x30,
157 0x32, 0x34, 0x42, 0x36, 0x32, 0x75, 0x30, 0x32, 0x30, 0x61, 0x63, 0x63,
158 0xc2, 0xa2, 0x64, 0x65, 0xc9, 0x8b, 0x36, 0x32);
159
160 assertNotWellFormed(-1, 0, -1, 0);
161 }
162
163 public void testShardsHaveExpectedRoundTrippables() {
164
165 long actual = 0;
166 for (long expected : generateFourByteShardsExpectedRunnables()) {
167 actual += expected;
168 }
169 assertEquals(EXPECTED_FOUR_BYTE_ROUNDTRIPPABLE_COUNT, actual);
170 }
171
172 private String newString(char... chars) {
173 return new String(chars);
174 }
175
176 private byte[] toByteArray(int... bytes) {
177 byte[] realBytes = new byte[bytes.length];
178 for (int i = 0; i < bytes.length; i++) {
179 realBytes[i] = (byte) bytes[i];
180 }
181 return realBytes;
182 }
183
184 private void assertWellFormed(int... bytes) {
185 assertTrue(Utf8.isWellFormed(toByteArray(bytes)));
186 }
187
188 private void assertNotWellFormed(int... bytes) {
189 assertFalse(Utf8.isWellFormed(toByteArray(bytes)));
190 }
191
192 private static long[] generateFourByteShardsExpectedRunnables() {
193 long[] expected = new long[128];
194
195 for (int i = 0; i <= 63; i++) {
196 expected[i] = 5300224;
197 }
198
199 for (int i = 97; i <= 111; i++) {
200 expected[i] = 2342912;
201 }
202
203 for (int i = 113; i <= 117; i++) {
204 expected[i] = 1048576;
205 }
206
207 expected[112] = 786432;
208 expected[118] = 786432;
209 expected[119] = 1048576;
210 expected[120] = 458752;
211 expected[121] = 524288;
212 expected[122] = 65536;
213
214 return expected;
215 }
216 }
217